Martin2019 (GSE134809_human_ileal_Crohns)

Cell type annotation

In [1]:
# load required modules
import scanpy as sc
import besca as bc
import pandas as pd
import numpy as np
import os

#import numpy as np
#from matplotlib import pyplot
#import sys

#setup document
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()
./.conda/envs/besca_test/lib/python3.6/site-packages/scanpy/api/__init__.py:6: FutureWarning: 

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.

  FutureWarning,
scanpy==1.4.5.post2 anndata==0.7.1 umap==0.3.10 numpy==1.17.5 scipy==1.4.1 pandas==0.24.1 scikit-learn==0.22.1 statsmodels==0.11.0 python-igraph==0.8.2 louvain==0.6.1
In [2]:
# define filepath (this is the folder that contains "raw" and "analyzed")
root_path = os.getcwd()

# input: results from standard workflow
analysis_name = 'standard_workflow_besca2_0'
results_folder = os.path.join(root_path, 'analyzed', analysis_name)
input_data = os.path.join(results_folder, analysis_name + '.h5ad') # specify a .h5ad file for storing the results

# define output directories
outdir_data = results_folder
outdir_figures = os.path.join(results_folder, 'figures')
outdir_results = os.path.join(results_folder, 'results')
sc.settings.figdir = os.path.join(outdir_figures)

os.makedirs(outdir_data, exist_ok=True)
os.makedirs(outdir_figures, exist_ok=True)
os.makedirs(outdir_results, exist_ok=True)
In [3]:
#reload our data from previously written out AnnData object
adata = sc.read(input_data)
In [4]:
adata
Out[4]:
AnnData object with n_obs × n_vars = 62202 × 1417 
    obs: 'CELL', 'CONDITION', 'Sample_geo_accession', 'Sample_title', 'Subject', 'tissue', 'status', '10x chemistry', 'Sample_relation', 'Sample_relation_2', 'Sample_supplementary_file_1', 'Sample_supplementary_file_2', 'Sample_supplementary_file_3', 'ID_REF', 'Barcode', 'Type', 'Cluster', 'Lane', 'Subtype', 'percent_mito', 'n_counts', 'n_genes', 'batch', 'leiden'
    var: 'ENSEMBL', 'SYMBOL', 'n_cells', 'total_counts', 'frac_reads'
    uns: 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
In [5]:
adata.obs.head()
Out[5]:
CELL CONDITION Sample_geo_accession Sample_title Subject tissue status 10x chemistry Sample_relation Sample_relation_2 ... Barcode Type Cluster Lane Subtype percent_mito n_counts n_genes batch leiden
index
GSM3972009_69.AAACATACACACCA-1 GSM3972009_69.AAACATACACACCA-1 Involved GSM3972009 Ileal Involved 69 pat. 5 ileal Involved V1 BioSample: https://www.ncbi.nlm.nih.gov/biosam... SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX... ... 69-AAACATACACACCA-1 T 29.0 69.0 Central Memory T cells 0.012165 1233.0 500 pat. 5 0
GSM3972009_69.AAACATTGGTGTCA-1 GSM3972009_69.AAACATTGGTGTCA-1 Involved GSM3972009 Ileal Involved 69 pat. 5 ileal Involved V1 BioSample: https://www.ncbi.nlm.nih.gov/biosam... SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX... ... 69-AAACATTGGTGTCA-1 unknown NaN unknown unknown 0.012548 4142.0 1277 pat. 5 12
GSM3972009_69.AAACGCACTTAGGC-1 GSM3972009_69.AAACGCACTTAGGC-1 Involved GSM3972009 Ileal Involved 69 pat. 5 ileal Involved V1 BioSample: https://www.ncbi.nlm.nih.gov/biosam... SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX... ... 69-AAACGCACTTAGGC-1 Stormal 5.0 69.0 Activated fibroblasts 0.006716 5806.0 1727 pat. 5 12
GSM3972009_69.AAACGCTGCTACCC-1 GSM3972009_69.AAACGCTGCTACCC-1 Involved GSM3972009 Ileal Involved 69 pat. 5 ileal Involved V1 BioSample: https://www.ncbi.nlm.nih.gov/biosam... SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX... ... 69-AAACGCTGCTACCC-1 T 43.0 69.0 Tregs 0.010526 1327.0 627 pat. 5 5
GSM3972009_69.AAACTTGAGTCACA-1 GSM3972009_69.AAACTTGAGTCACA-1 Involved GSM3972009 Ileal Involved 69 pat. 5 ileal Involved V1 BioSample: https://www.ncbi.nlm.nih.gov/biosam... SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX... ... 69-AAACTTGAGTCACA-1 Stormal 10.0 69.0 Pericytes 0.013407 3803.0 1327 pat. 5 21

5 rows × 24 columns

In [ ]:
 
In [6]:
%matplotlib inline  
sc.settings.set_figure_params(dpi=90)
In [7]:
sc.pl.umap(adata, color=['leiden', 'Type'], legend_loc='on data', legend_fontsize=9)
In [8]:
sc.pl.umap(adata, color=['leiden', 'Subtype'], legend_loc='on data', legend_fontsize=6)
In [9]:
sc.pl.umap(adata, color=['CD4', 'CD8A'])
In [10]:
sc.pl.umap(adata, color=['Subtype'], legend_fontsize=9)
In [11]:
# Load GMT file and calculate signature scores
import pkg_resources
gmt_file_IMM=pkg_resources.resource_filename('besca', 'datasets/genesets/HumanCD45p_scseqCMs6.gmt')
adata_with_scores = adata.copy()
bc.tl.sig.combined_signature_score(adata_with_scores, gmt_file_IMM)
computing score 'score_HumanCD45p_scseqCMs6_ActB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_ActB_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Activation_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Activation_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Basophil_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Basophil_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Bcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Bcells_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_CCG1S_scanpyUP'
WARNING: genes are not in var_names and ignored: ['DSSC1']
    finished: added
    'score_HumanCD45p_scseqCMs6_CCG1S_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_CCG2M_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CSK2']
    finished: added
    'score_HumanCD45p_scseqCMs6_CCG2M_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Cafs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FDF7']
    finished: added
    'score_HumanCD45p_scseqCMs6_Cafs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Cellcycle_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Cellcycle_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Checkpoint_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Checkpoint_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Cyto_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TNFA', 'IL7A', 'IL8', 'IL12', 'IL23', 'GM-CSF', 'GCSFCCL1', 'CCL12', 'CCL27', 'SDF1A', 'BCA1', 'MIP1B']
    finished: added
    'score_HumanCD45p_scseqCMs6_Cyto_scanpyUP', score of gene set (adata.obs) (0:00:01)
computing score 'score_HumanCD45p_scseqCMs6_Cytotox_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Cytotox_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_DCR_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_DCR_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_DCrec_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_DCrec_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_DCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['LY6C1', 'SIGLECH']
    finished: added
    'score_HumanCD45p_scseqCMs6_DCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Eff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Eff_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Endo_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ITCAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Endo_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Endot_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Endot_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Endothelial_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Endothelial_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Eosinophil_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3', 'SLIGLEC10']
    finished: added
    'score_HumanCD45p_scseqCMs6_Eosinophil_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Epith_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Epith_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_ExhB_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TILPL2']
    finished: added
    'score_HumanCD45p_scseqCMs6_ExhB_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Granulo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Granulo_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_HLA_scanpyUP'
WARNING: genes are not in var_names and ignored: ['HLA-H', 'HLA-L', 'HLA-DRB2']
    finished: added
    'score_HumanCD45p_scseqCMs6_HLA_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_HLAP_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_HLAP_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_HLAS_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_HLAS_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Ifi_scanpyUP'
WARNING: genes are not in var_names and ignored: ['OAS1G']
    finished: added
    'score_HumanCD45p_scseqCMs6_Ifi_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Ifng_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Ifng_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Macrophage_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Macrophage_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Mast_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Mast_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_MelMelan_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TYR', 'SLC45A2', 'SLC24A5', 'MAGEA6', 'PRAME', 'PAX3', 'MLANA']
    finished: added
    'score_HumanCD45p_scseqCMs6_MelMelan_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_MelMesen_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CYR6']
    finished: added
    'score_HumanCD45p_scseqCMs6_MelMesen_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_MemB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_MemB_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Memory_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Memory_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Mo14_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Mo14_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Mo16_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Mo16_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_MoMa_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_MoMa_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Monocytes_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3', 'FCGR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Monocytes_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Myelo_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR4', 'FCGR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Myelo_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpyUP'
WARNING: genes are not in var_names and ignored: ['LY6G']
    finished: added
    'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NKT_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKT_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NKcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKcells_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NKcyt_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKcyt_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NKnai_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKnai_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Naive_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Naive_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NaiveB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NaiveB_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Neutrophil_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CEACAM8', 'CXCR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Neutrophil_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_NonEff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NonEff_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_OMyelo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_OMyelo_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Others_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Others_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Plasma_scanpyUP'
WARNING: genes are not in var_names and ignored: ['IGJ']
    finished: added
    'score_HumanCD45p_scseqCMs6_Plasma_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Pyro_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Pyro_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Stemmess_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Stemmess_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_StemmessS_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_StemmessS_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Stromal_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Stromal_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_T4CM_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TRADO']
    finished: added
    'score_HumanCD45p_scseqCMs6_T4CM_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TAM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TAM_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TAMCx_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TAMCx_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TEM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TEM_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TMO_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TMO_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TMid_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ANGTPL4']
    finished: added
    'score_HumanCD45p_scseqCMs6_TMid_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TNK_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TNK_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TStem_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStem_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TStemhi_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStemhi_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TSteml_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TSteml_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TStemlo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStemlo_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TTh1_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SPP4', 'IFNA1']
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh1_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TTh17_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh17_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TTh2_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3', 'CSCR4', 'IL5']
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh2_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tcd4_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcd4_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tcd8_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcd8_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcells_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tcgd_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TRDV2']
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcgd_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tcytox_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcytox_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Teff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Teff_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tfh_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FLAMF1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Tfh_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_TilCM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TilCM_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Tpexh_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tpexh_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Treg_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Treg_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_Ttexh_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CXCR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Ttexh_scanpyUP', score of gene set (adata.obs) (0:00:01)
computing score 'score_HumanCD45p_scseqCMs6_Ubi_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Ubi_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_UnivExh_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivExh_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_UnivMem_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCXR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivMem_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_UnivNaive_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivNaive_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_aDCs_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_aDCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_allSteml_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_allSteml_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_cDC1_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_cDC1_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_cDC2_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_cDC2_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_cDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PLET1']
    finished: added
    'score_HumanCD45p_scseqCMs6_cDCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_epDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECG', 'PLET1']
    finished: added
    'score_HumanCD45p_scseqCMs6_epDCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_general_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_general_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_moDC_scanpyUP'
WARNING: genes are not in var_names and ignored: ['LYPD2']
    finished: added
    'score_HumanCD45p_scseqCMs6_moDC_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_pDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECH']
    finished: added
    'score_HumanCD45p_scseqCMs6_pDCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
computing score 'score_HumanCD45p_scseqCMs6_uDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECG']
    finished: added
    'score_HumanCD45p_scseqCMs6_uDCs_scanpyUP', score of gene set (adata.obs) (0:00:00)
In [12]:
adata_with_scores
Out[12]:
AnnData object with n_obs × n_vars = 62202 × 1417 
    obs: 'CELL', 'CONDITION', 'Sample_geo_accession', 'Sample_title', 'Subject', 'tissue', 'status', '10x chemistry', 'Sample_relation', 'Sample_relation_2', 'Sample_supplementary_file_1', 'Sample_supplementary_file_2', 'Sample_supplementary_file_3', 'ID_REF', 'Barcode', 'Type', 'Cluster', 'Lane', 'Subtype', 'percent_mito', 'n_counts', 'n_genes', 'batch', 'leiden', 'score_HumanCD45p_scseqCMs6_ActB_scanpy', 'score_HumanCD45p_scseqCMs6_Activation_scanpy', 'score_HumanCD45p_scseqCMs6_Basophil_scanpy', 'score_HumanCD45p_scseqCMs6_Bcells_scanpy', 'score_HumanCD45p_scseqCMs6_CCG1S_scanpy', 'score_HumanCD45p_scseqCMs6_CCG2M_scanpy', 'score_HumanCD45p_scseqCMs6_Cafs_scanpy', 'score_HumanCD45p_scseqCMs6_Cellcycle_scanpy', 'score_HumanCD45p_scseqCMs6_Checkpoint_scanpy', 'score_HumanCD45p_scseqCMs6_Cyto_scanpy', 'score_HumanCD45p_scseqCMs6_Cytotox_scanpy', 'score_HumanCD45p_scseqCMs6_DCR_scanpy', 'score_HumanCD45p_scseqCMs6_DCrec_scanpy', 'score_HumanCD45p_scseqCMs6_DCs_scanpy', 'score_HumanCD45p_scseqCMs6_Eff_scanpy', 'score_HumanCD45p_scseqCMs6_Endo_scanpy', 'score_HumanCD45p_scseqCMs6_Endot_scanpy', 'score_HumanCD45p_scseqCMs6_Endothelial_scanpy', 'score_HumanCD45p_scseqCMs6_Eosinophil_scanpy', 'score_HumanCD45p_scseqCMs6_Epith_scanpy', 'score_HumanCD45p_scseqCMs6_ExhB_scanpy', 'score_HumanCD45p_scseqCMs6_Granulo_scanpy', 'score_HumanCD45p_scseqCMs6_HLA_scanpy', 'score_HumanCD45p_scseqCMs6_HLAP_scanpy', 'score_HumanCD45p_scseqCMs6_HLAS_scanpy', 'score_HumanCD45p_scseqCMs6_Ifi_scanpy', 'score_HumanCD45p_scseqCMs6_Ifng_scanpy', 'score_HumanCD45p_scseqCMs6_Macrophage_scanpy', 'score_HumanCD45p_scseqCMs6_Mast_scanpy', 'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpy', 'score_HumanCD45p_scseqCMs6_MelMelan_scanpy', 'score_HumanCD45p_scseqCMs6_MelMesen_scanpy', 'score_HumanCD45p_scseqCMs6_MemB_scanpy', 'score_HumanCD45p_scseqCMs6_Memory_scanpy', 'score_HumanCD45p_scseqCMs6_Mo14_scanpy', 'score_HumanCD45p_scseqCMs6_Mo16_scanpy', 'score_HumanCD45p_scseqCMs6_MoMa_scanpy', 'score_HumanCD45p_scseqCMs6_Monocytes_scanpy', 'score_HumanCD45p_scseqCMs6_Myelo_scanpy', 'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpy', 'score_HumanCD45p_scseqCMs6_NKT_scanpy', 'score_HumanCD45p_scseqCMs6_NKcells_scanpy', 'score_HumanCD45p_scseqCMs6_NKcyt_scanpy', 'score_HumanCD45p_scseqCMs6_NKnai_scanpy', 'score_HumanCD45p_scseqCMs6_Naive_scanpy', 'score_HumanCD45p_scseqCMs6_NaiveB_scanpy', 'score_HumanCD45p_scseqCMs6_Neutrophil_scanpy', 'score_HumanCD45p_scseqCMs6_NonEff_scanpy', 'score_HumanCD45p_scseqCMs6_OMyelo_scanpy', 'score_HumanCD45p_scseqCMs6_Others_scanpy', 'score_HumanCD45p_scseqCMs6_Plasma_scanpy', 'score_HumanCD45p_scseqCMs6_Pyro_scanpy', 'score_HumanCD45p_scseqCMs6_Stemmess_scanpy', 'score_HumanCD45p_scseqCMs6_StemmessS_scanpy', 'score_HumanCD45p_scseqCMs6_Stromal_scanpy', 'score_HumanCD45p_scseqCMs6_T4CM_scanpy', 'score_HumanCD45p_scseqCMs6_TAM_scanpy', 'score_HumanCD45p_scseqCMs6_TAMCx_scanpy', 'score_HumanCD45p_scseqCMs6_TEM_scanpy', 'score_HumanCD45p_scseqCMs6_TMO_scanpy', 'score_HumanCD45p_scseqCMs6_TMid_scanpy', 'score_HumanCD45p_scseqCMs6_TNK_scanpy', 'score_HumanCD45p_scseqCMs6_TStem_scanpy', 'score_HumanCD45p_scseqCMs6_TStemhi_scanpy', 'score_HumanCD45p_scseqCMs6_TSteml_scanpy', 'score_HumanCD45p_scseqCMs6_TStemlo_scanpy', 'score_HumanCD45p_scseqCMs6_TTh1_scanpy', 'score_HumanCD45p_scseqCMs6_TTh17_scanpy', 'score_HumanCD45p_scseqCMs6_TTh2_scanpy', 'score_HumanCD45p_scseqCMs6_Tcd4_scanpy', 'score_HumanCD45p_scseqCMs6_Tcd8_scanpy', 'score_HumanCD45p_scseqCMs6_Tcells_scanpy', 'score_HumanCD45p_scseqCMs6_Tcgd_scanpy', 'score_HumanCD45p_scseqCMs6_Tcytox_scanpy', 'score_HumanCD45p_scseqCMs6_Teff_scanpy', 'score_HumanCD45p_scseqCMs6_Tfh_scanpy', 'score_HumanCD45p_scseqCMs6_TilCM_scanpy', 'score_HumanCD45p_scseqCMs6_Tpexh_scanpy', 'score_HumanCD45p_scseqCMs6_Treg_scanpy', 'score_HumanCD45p_scseqCMs6_Ttexh_scanpy', 'score_HumanCD45p_scseqCMs6_Ubi_scanpy', 'score_HumanCD45p_scseqCMs6_UnivExh_scanpy', 'score_HumanCD45p_scseqCMs6_UnivMem_scanpy', 'score_HumanCD45p_scseqCMs6_UnivNaive_scanpy', 'score_HumanCD45p_scseqCMs6_aDCs_scanpy', 'score_HumanCD45p_scseqCMs6_allSteml_scanpy', 'score_HumanCD45p_scseqCMs6_cDC1_scanpy', 'score_HumanCD45p_scseqCMs6_cDC2_scanpy', 'score_HumanCD45p_scseqCMs6_cDCs_scanpy', 'score_HumanCD45p_scseqCMs6_epDCs_scanpy', 'score_HumanCD45p_scseqCMs6_general_scanpy', 'score_HumanCD45p_scseqCMs6_moDC_scanpy', 'score_HumanCD45p_scseqCMs6_pDCs_scanpy', 'score_HumanCD45p_scseqCMs6_uDCs_scanpy'
    var: 'ENSEMBL', 'SYMBOL', 'n_cells', 'total_counts', 'frac_reads'
    uns: 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap', 'Type_colors', 'Subtype_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
In [13]:
sc.pl.umap(adata_with_scores, color=['score_HumanCD45p_scseqCMs6_Tcells_scanpy',
                                     'score_HumanCD45p_scseqCMs6_Cellcycle_scanpy',
                                     'score_HumanCD45p_scseqCMs6_Endothelial_scanpy'], legend_loc='on data',legend_fontsize=6)
In [14]:
mymarkers=bc.datasets.load_immune_signatures()
setName = 'HumanCD45p_scseqCMs6'

# Fract_pos was exported by BESCA in the standard worflow
f=pd.read_csv(results_folder + "/labelings/leiden/fract_pos.gct",sep="\t",skiprows=2)
df=bc.tl.sig.score_mw(f,mymarkers)
myc=np.median(df.loc[setName+'_Ubi',:]*2/3)

cNames=bc.tl.sig.make_anno(df,myc,setName,f, 0.3)
adata=bc.tl.sig.add_anno(adata,cNames, 'leiden')
In [15]:
sc.pl.umap(adata, color=['cell_names','sscell_group', 'leiden'], legend_loc='on data',legend_fontsize=6)
sc.pl.umap(adata, color=['cell_group','scell_group', 'leiden'], legend_loc='on data',legend_fontsize=6)
... storing 'cell_names' as categorical
... storing 'cell_group' as categorical
... storing 'scell_group' as categorical
... storing 'sscell_group' as categorical
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [16]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['CD4-positive, alpha-beta memory T cell', 'CD8-positive, alpha-beta cytotoxic T cell', 
                     'memory B cell', 'naive T cell', 'IgM or IgA plasma cell', 'regulatory T cell', 'myeloid leukocyte', 
                     'CD8-positive, alpha-beta memory T cell', 'naive B cell', 'intestinal epithelial cell', 
                     'IgM or IgA plasma cell', 'IgG plasma cell', 'fibroblast', 'IgM or IgA plasma cell', 
                     'blood vessel endothelial cell', 'intestinal epithelial cell', 'group 3 innate lymphoid cell', 
                     'group 1 innate lymphoid cell', 'memory T cell', 'proliferating T cell', 'myeloid dendritic cell', 
                     'pericyte cell', 'neural cell', 'HEV endothelial cell', 'plasmacytoid dendritic cell']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden', new_annotation_label='dblabel', new_cluster_labels=new_cluster_names)
In [17]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['T cell', 'T cell', 'B cell', 'T cell', 'plasma cell', 'T cell', 'myeloid leukocyte', 'T cell', 
                     'B cell', 'epithelial cell', 'plasma cell', 'plasma cell', 'fibroblast', 'plasma cell', 
                     'endothelial cell', 'epithelial cell', 'ILC3', 'ILC1', 'T cell', 'T cell', 'cDC', 'pericyte cell', 
                     'neural cell', 'endothelial cell', 'pDC']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden', new_annotation_label='celltype', new_cluster_labels=new_cluster_names)
In [ ]:
 
In [18]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['0: T cell', '1: T cell', '2: B cell', '3: T cell', '4: plasma cell', '5: T cell', '6: myeloid leukocyte',
                     '7: T cell', '8: B cell', '9: epithelial cell', '10: plasma cell', '11: plasma cell', 
                     '12: fibroblast', '13: plasma cell', '14: endothelial cell', '15: epithelial cell', 
                     '16: ILC3', '17: ILC1', '18: T cell', '19: T cell', '20: cDC', '21: pericyte cell', 
                     '22: neural cell', '23: endothelial cell', '24: pDC']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden', new_annotation_label='cluster_celltype', new_cluster_labels=new_cluster_names)
In [ ]:
 
In [19]:
adata = bc.st.additional_labeling(adata, 'dblabel', 'dblabel', 'Curated celltype annotation.', 'Klas Hatje', outdir_data)
ranking genes
... storing 'dblabel' as categorical
... storing 'celltype' as categorical
... storing 'cluster_celltype' as categorical
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:01:43)
rank genes per label calculated using method wilcoxon.
mapping of cells to  dblabel exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.logFC.gct written out
In [20]:
adata = bc.st.additional_labeling(adata, 'celltype', 'celltype', 'Manual celltype annotation.', 'Klas Hatje', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:01:35)
rank genes per label calculated using method wilcoxon.
mapping of cells to  celltype exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.logFC.gct written out
In [21]:
adata = bc.st.additional_labeling(adata, 'Type', 'Martin2019_Type', 'Cell type annotation from authors [Martin2019].', 'Martin et al', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:01:32)
rank genes per label calculated using method wilcoxon.
mapping of cells to  Type exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Type/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Type/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Type/WilxRank.logFC.gct written out
In [22]:
adata = bc.st.additional_labeling(adata, 'Subtype', 'Martin2019_Subtype', 'Cell subtype annotation from authors [Martin2019].', 'Martin et al', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:01:52)
rank genes per label calculated using method wilcoxon.
mapping of cells to  Subtype exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Subtype/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Subtype/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Martin2019_Subtype/WilxRank.logFC.gct written out
In [ ]:
 
In [ ]:
adata.write(filename=os.path.join(outdir_data, 'Martin2019_processed.h5ad'), )